<?php
/*******************************************************************
* Glype Proxy Script
*
* Copyright (c) 2008, http://www.glype.com/
*
* Permission to use this script is granted free of charge
* subject to the terms displayed at http://www.glype.com/downloads
* and in the LICENSE.txt document of the glype package.
*******************************************************************
* This is the parser for the proxy - changes the original 'raw'
* document so that everything (images, links, etc.) is rerouted to
* be downloaded via the proxy script instead of directly.
******************************************************************/

class parser {

   // State of javascript parser - null for parse everything, false
   // for parse all non-standard overrides, or (array) with specifics
   private $jsFlagState;
   
   // Browsing options (Remove Scripts, etc.)
   private $htmlOptions;

   // Constructor accepts options and saves them in the object
   function __construct($htmlOptions, $jsFlags) {
      $this->jsFlagState = $jsFlags;
      $this->htmlOptions = $htmlOptions;
   }

   
   /*****************************************************************
   * HTML parsers - main parsing function splits up document into
   * component parts ('normal' HTML, scripts and styles)
   ******************************************************************/
   
   function HTMLDocument($input, $insert='', $inject=false, $footer='') {

      //
      // Apply parsing that only needs to be done once..
      //

      // Remove titles if option is enabled
      if ( $this->htmlOptions['stripTitle'] ) {
         $input = preg_replace('#<title.*?</title>#is', '', $input, 1);
      }

      // Remove and record a <base> href
   	$input = preg_replace_callback('#<base href\s*=\s*([\\\'"])?((?(1)(?(?<=")[^"]{1,1000}|[^\\\']{1,1000})|[^\s"\\\'>]{1,1000}))(?(1)\\1|)[^>]*>#i', 'html_stripBase', $input, 1);

      // Proxify url= values in meta redirects
      $input = preg_replace_callback('#content\s*=\s*(["\\\'])?[0-9]+\s*;\s*url=([\\\'"]|&\#39;)?((?(?<=")[^"]+|(?(?<=\\\')[^\\\']+|[^\\\'" >]+)))(?(2)\\2|)(?(1)\\1|)#i', 'html_metaRefresh', $input, 1);

      // Process forms
      $input = preg_replace_callback('#<form([^>]*)>(.*?)</form>#is', 'html_form', $input);
      
      // Remove scripts blocks (avoids individual processing below)
      if ( $this->htmlOptions['stripJS'] ) {
         $input = preg_replace('#<script[^>]*>.*?</script>#is', '', $input);
      }
      
      
      //
      // Split up the document into its different types and parse them
      //

      // Build up new document into this var
      $new    = '';
      $offset = 0;

      // Find instances of script or style blocks
      while ( preg_match('#<(s(?:cript|tyle))[^>]*>#i', $input, $match, PREG_OFFSET_CAPTURE, $offset) ) {

         // What type of block is this?
         $block = strtolower($match[1][0]);

         // Start position of content
         $outerStart = $match[0][1];
         $innerStart = $outerStart + strlen($match[0][0]);

         // Determine type of end tag and find it's position
         $endTag   = "</$block>";
         $innerEnd = stripos($input, $endTag, $innerStart);
         $outerEnd = $innerEnd + strlen($endTag);
         
         // Parse everything up till here and add to the new document
         $new .= $this->HTML(substr($input, $offset, $innerStart - $offset));
         
         // Find parsing function
         $parseFunction = $block == 'style' ? 'CSS' : 'JS' ;

         // Add the parsed block
         $new .= $this->$parseFunction(substr($input, $innerStart, $innerEnd - $innerStart));

         // Move offset to new position
         $offset = $innerEnd;

      }

      // And add the final chunk (between last script/style block and end of doc)
      $new .= $this->HTML(substr($input, $offset));

      // Replace input with the updated document
      $input = $new;

      // Encode the page
      if ( $this->htmlOptions['encodePage'] ) {
         $input = encodePage($input);
      }

      //
      // Now add our own code bits
      //

      // Insert our mini form after the <body>
      if ( $insert !== false ) {

         // Check for a frameset
         if ( ( $useFrames = stripos($input, '<frameset') ) !== false ) {

            // Flag the frames so only first displays mini-form
            $input = preg_replace_callback('#<frame[^>]+src\s*=\s*([\\\'"])?((?(1)(?(?<=")[^"]{1,1000}|[^\\\']{1,1000})|[^\s"\\\'>]{1,1000}))(?(1)\\1|)#i', 'html_flagFrames', $input);

         }

         // Attempt to add after body
         $input = preg_replace('#(<body[^>]*>)#i', '$1' . $insert, $input, 1, $tmp);

         // Check it inserted and append (if not a frameset)
         if ( ! $tmp && ! $useFrames ) {
            $input = $insert . $input;
         }

      }

      // Insert our javascript library
      if ( $inject ) {

         // Generate javascript to insert
         $inject = injectionJS();

         // Add our proxy javascript after <head>
         $input = preg_replace('#(<head[^>]*>)#i', '$1' . $inject, $input, 1, $tmp);

         // If no <head>, just prepend
         if ( ! $tmp ) {
            $input = $inject . $input;
         }

      }

      // Add anything to the footer?
      if ( $footer ) {

         $input = preg_replace('#(</body[^>]*>)#i', $footer . '$1', $input, 1, $tmp);

         // If no </body>, just append the footer
         if ( ! $tmp ){
            $input .= $footer;
         }

      }

      // Return new document
      return $input;

   }

   // Parse HTML sections
   function HTML($input) {

      // Removing objects? Follow spec and display inner content of object tags instead.
      if ( $this->htmlOptions['stripObjects'] ) {

         // Remove all object tags (including those deprecated but still common)
         $input = preg_replace('#<(?>object|applet|param|embed)[^>]*>#i', '', $input, -1, $tmp);

         // Found any? Remove the corresponding end tags
         if ( $tmp ) {
            $input = preg_replace('#</(?>object|applet|param|embed)>#i', '', $input, $tmp);
         }

      } else {

         // Parse <param name="movie" value="URL"> tags
         $input = preg_replace_callback('#<param[^>]+value\s*=\s*([\\\'"])?((?(1)(?(?<=")[^"]{1,1000}|[^\\\']{1,1000})|[^\s"\\\'>]{1,1000}))(?(1)\\1|)[^>]*>#i', 'html_paramValue', $input);

         // To do: proxify object related URLs

      }

      // Show content within <noscript> tags
      // (preg_ seems to be faster than 2 str_ireplace() calls)
      if ( $this->htmlOptions['stripJS'] ) {
         $input = preg_replace('#</?noscript>#i', '', $input);
      }

      // Parse onX events
      $input = preg_replace_callback('#\b(on(?<!\.on)[a-z]{2,20})\s*=\s*([\\\'"])?((?(2)(?(?<=")[^"]{1,1000}|[^\\\']{1,1000})|[^\s"\\\'>]{1,1000}))(?(2)\\2|)#i', array(&$this, 'html_eventJS'), $input);

      // Parse style attributes
      $input = preg_replace_callback('#style\s*=\s*([\\\'"])?((?(1)(?(?<=")[^"]{1,1000}|[^\\\']{1,1000})|[^\s"\\\'>]{1,1000}))(?(1)\\1|)#i', array(&$this, 'html_elementCSS'), $input);

      // Proxify URL attributes - this is the bottleneck but optimized
      // as much as possible (or at least, as much as I can).
      $input = preg_replace_callback('#(?><[A-Z][A-Z0-9]{0,15})(?>\s+[^>\s]+)*?\s*(?>(href|src|background)\s*=(?!\\\\)\s*)(?>([\\\'"])?)((?(2)(?(?<=")[^"]{1,1000}|[^\\\']{1,1000})|[^ >]{1,1000}))(?(2)\\2|)#i', 'html_attribute', $input);

      // Return changed input
      return $input;

   }

   // Proxify an onX javascript event
   function html_eventJS($input) {
      return $this->htmlOptions['stripJS'] ? '' : $input[1] . '=' . $input[2] . $this->JS($input[3]) . $input[2];
   }

   // Proxify a style="CSS" attribute
   function html_elementCSS($input) {
      return 'style=' . $input[1] . $this->CSS($input[2]) . $input[1];
   }


   /*****************************************************************
   * CSS parser - main parsing function
   * CSS parsing is a complicated by the caching of CSS files. We need
   * to consider (A) cross-domain caching and (B) the unique URLs option.
   *   A) If possible, use a relative URL so the saved URLs do not explictly
   *      point to a single domain.
   *   B) There is a second set of callback functions with "_unique" suffixed
   *      and these return the original URL to be reparesed.
   ******************************************************************/

   // The URLs depend on the unique and path info settings. The type parameter allows
   // us to specify the unique callbacks.
   function CSS($input, $storeUnique=false) {

      // What type of parsing is this? Normally we parse any URLs to redirect
      // back through the proxy but not when storing a cache with unique URLs.
      $type = $storeUnique ? '_unique' : '';

   	// CSS needs proxifying the calls to url(), @import and src=''
   	$input = preg_replace_callback('#\burl\s*\(\s*[\\\'"]?([^\\\'"\)]+)[\\\'"]?\s*\)#i', 'css_URL' . $type, $input);
   	$input = preg_replace_callback('#@import\s*[\\\'"]([^\\\'"\(\)]+)[\\\'"]#i', 'css_import' . $type, $input);
   	$input = preg_replace_callback('#\bsrc\s*=\s*([\\\'"])?([^)\\\'"]+)(?(1)\\1|)#i', 'css_src' . $type, $input);

      // Return changed
   	return $input;

   }


   /*****************************************************************
   * Javascript parser - main parsing function
   *
   * The specific parts that need 'proxifying' depends on which javascript
   * functions we've been able to override. On first page load, the browser
   * capabilities are tested to see what we can do client-side and the results
   * sent back to us. This allows us to parse only what we have to.
   * If $CONFIG['override_javascript'] is disabled, all commands are parsed
   * server-side. This will use much more CPU!
   *
   * Commands to proxify only if no override at all:
   *   document.write()
   *   document.writeln()
   *   window.open()
   *   eval()
   *
   * Commands to proxify, regardless of browser capabilities:
   *   location.replace()
   *   .innerHTML=
   *
   * Commands to proxify if the extra "watch" flag is set
   * (the browser doesn't support the .watch() method):
   *   location=
   *   x.location=
   *   location.href=
   *
   * Commands to proxify if the extra "setters" flag is set
   * (the browser doesn't support the __defineSetter__() method):
   *   .src=
   *   .href=
   *   .background=
   *   .action=
   *
   * Commands to proxify if the extra "ajax" flag is set
   * (the browser failed to override the .open() method):
   *   XMLHttpRequest.open()
   ******************************************************************/

   function JS($input) {

      // Stripping?
      if ( $this->htmlOptions['stripJS'] ) {
         return '';
      }
      
      // Get our flags
      $flags = $this->jsFlagState;

      // Unless we know we don't need to, apply all the browser-specific flags
      if ( ! is_array($this->jsFlagState) ) {
         $flags = array('ajax', 'watch', 'setters');
      }
      
      // If override is disabled, add a "base" flag
      if ( $this->jsFlagState === null ) {
         $flags[] = 'base';
      }

      // Start parsing!
      $search = array();
      
      // Create shortcuts to various search patterns:
      //   "before"    - matches preceeding character (string of single char) [ignoring whitespace]
      //   "after"     - matches next character (string of single char) [ignoring whitespace]
      //   "id"        - key for identifying the original match (e.g. if we have >1 of the same key)
      $assignmentPattern   = array('before'    => '.',              'after' => '='); 
      $methodPattern       = array('before'    => '.',              'after' => '(');
      $functionPattern     = array('after' => '(');

      // Configure strings to search for, starting with always replaced commands
      $search['innerHTML'][] = $assignmentPattern;
      $search['location'][]  = array('after' => '.', 'id' => 'replace()');
         # ^ This is only for location.replace() - other forms are handled later

      // Look for attribute assignments
      if ( in_array('setters', $flags) ) {
         $search['src'][]        = $assignmentPattern;
         $search['href'][]       = $assignmentPattern;
         $search['action'][]     = $assignmentPattern;
         $search['background'][] = $assignmentPattern;
      }
      
      // Look for location changes
      // location.href will be handled above, location= is handled here
      if ( in_array('watch', $flags) ) {
         $search['location'][] = array('after' => '=', 'id' => 'assignment');
      }

      // Look for .open() if either AJAX (XMLHttpRequest.open) or
      // base (window.open) flags are present
      if ( in_array('ajax', $flags) || in_array('base', $flags) ) {
         $search['open'][] = $methodPattern;
      }
      
      // Add the basic code if no override
      if ( in_array('base', $flags) ) {
         $search['eval'][]    = $functionPattern;
         $search['writeln'][]   = $methodPattern;
         $search['write'][]   = $methodPattern;
      }

      // Set up starting parameters
      $offset        = 0;
      $length        = strlen($input);
      $searchStrings = array_keys($search);

      while ( $offset < $length ) {

         // Start off by assuming no more items (i.e. the next position
         // of interest is the end of the document)
         $commandPos = $length;

         // Loop through the search subjects
         foreach ( $searchStrings as $item ) {

            // Any more instances of this?
            if ( ( $tmp = strpos($input, $item, $offset) ) === false ) {

               // Nope, skip to next item
               continue;

            }


            // Closer to the currently held 'next' position?
            if ( $tmp < $commandPos ) {

               $commandPos = $tmp;
               $command = $item;

            }

         }

         // No matches found? Finish parsing.
         if ( $commandPos == $length ) {
            break;
         }
         
         // We've found the main point of interest; now use the
         // search parameters to check the surrounding chars to validate
         // the match.
         $valid = false;

         foreach ( $search[$command] as $pattern ) {
         
            // Check the preceeding chars
            if ( isset($pattern['before']) && str_checkprev($input, $pattern['before'], $commandPos-1) === false ) {
               continue;
            }
            
            // Check next chars
            if ( isset($pattern['after']) && ( $postCharPos = str_checknext($input, $pattern['after'], $commandPos + strlen($command), false, true) ) === false ) {
               continue;
            }
            
            // Still here? Match must be OK so generate a match ID          
            if ( isset($pattern['id']) ) {
               $valid = $command . $pattern['id'];
            } else {
               $valid = $command;
            }
            
            break;
            
         }
         
         // What we do next depends on which match (if any) we've found...
         switch ( $valid ) {
         
            // Assigment
            case 'src':
            case 'href':
            case 'background':
            case 'action':
            case 'locationassignment':
            case 'innerHTML':

               // Check our post-char position for = as well (could be equality
               // test rather than assignment, i.e. == )
               if ( ! isset($input[$postCharPos]) || $input[$postCharPos] == '=' ) {
                  break;
               }

               // Find the end of this statement
               $endPos = analyze_js($input, $postCharPos);
               $valueLength = $endPos - $postCharPos;

               // Produce replacement command
               $replacement = sprintf('parse%s(%s)', $command=='innerHTML' ? 'HTML' : 'URL', substr($input, $postCharPos, $valueLength));
               
               // Adjust total document length as appropriate
               $length += strlen($replacement);
               
               // Make the replacement
               $input = substr_replace($input, $replacement, $postCharPos, $valueLength);
               
               // Move offset up to new position
               $offset = $endPos + 10;
               
               // Go get next match
               continue 2;
               
               
            // Function calls - we don't know for certain if these are in fact members of the
            // appropriate objects (window/XMLHttpRequest for .open(), document for .write() and
            // .writeln) so we won't change anything. Main.js still overrides these functions but
            // does nothing with them by default. We add an extra parameter to tell our override
            // to kick in.
            case 'open':
            case 'write':
            case 'writeln':
               
               // Find the end position (the closing ")" for the function call)
               $endPos = analyze_js($input, $postCharPos);
               
               // Insert our additional argument just before that
               $input = substr_replace($input, ',"gl"', $endPos, 0);
               
               // Adjust the document length
               $length += 5;
               
               // And move the offset
               $offset = $endPos + 5;
               
               // Get next match
               continue 2;
            
            
            // Eval() is a just as easy since we can just wrap the entire thing in parseJS().
            case 'eval':
            
               // Ensure this is a call to eval(), not anotherfunctionendingineval()
               if ( isset($input[$commandPos-1]) && strpos('abcdefghijklmnopqrstuvwxyz123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_', $input[$commandPos-1]) !== false ) {
                  break;
               }
            
               // Find the end position (the closing ")" for the function call)
               $endPos = analyze_js($input, $postCharPos);
               $valueLength = $endPos - $postCharPos;
               
               // Generate our replacement
               $replacement = sprintf('parseJS(%s)', substr($input, $postCharPos, $valueLength));
               
               // Make the replacement
               $input = substr_replace($input, $replacement, $postCharPos, $valueLength);
               
               // Adjust the document length
               $length += 9;
               
               // And move the offset
               $offset = $endPos + 9;
               continue 2;
            
            
            // location.replace() is a tricky one. We have the position of the char
            // after . as $postCharPos and need to ensure we're calling replace(), 
            // then parse the entire URL
            case 'locationreplace()':

               // Validate the match
               if ( ! preg_match('#\Greplace\s*\(#', $input, $tmp, 0, $postCharPos) ) {
                  break;
               }

               // Move $postCharPos to inside the brackets of .replace()
               $postCharPos += strlen($tmp[0]);
            
               // Find the end position (the closing ")" for the function call)
               $endPos = analyze_js($input, $postCharPos);
               $valueLength = $endPos - $postCharPos;
               
               // Generate our replacement
               $replacement = sprintf('parseURL(%s)', substr($input, $postCharPos, $valueLength));
               
               // Make the replacement
               $input = substr_replace($input, $replacement, $postCharPos, $valueLength);
               
               // Adjust the document length
               $length += 9;
               
               // And move the offset
               $offset = $endPos + 9;
               
               continue 2;
               
         }
         
         // Still here? A match didn't validate so adjust offset to just after
         // current position
         $offset = $commandPos + 1;

      }

      // Ignore document.domain
      $input = str_replace('document.domain', 'ignore', $input);

      // Return changed
      return $input;

   }

}


/*****************************************************************
* HTML callbacks
******************************************************************/

// Remove and record the <base> href
function html_stripBase($input) {
   global $base;
   $base = $input[2];
   return '';
}

// Proxify the location of a meta refresh
function html_metaRefresh($input) {
   return str_replace($input[3], proxifyURL($input[3]), $input[0]);
}

// Proxify URL in <param name="movie" value="URL">
function html_paramValue($input) {

   // Check for a name="movie" tag
   if ( stripos($input[0], 'movie') === false ) {
      return $input[0];
   }

   return str_replace($input[2], proxifyURL($input[2]), $input[0]);
}

// Process forms - the query string is used by the proxy script
// and GET data needs to be encoded anyway. We convert all GET
// forms to POST and then the proxy script will forward it properly.
function html_form($input) {

   // Check for a given method
   if ( preg_match('#\bmethod\s*=\s*["\\\']?(get|post)["\\\']?#i', $input[1], $tmp) ) {

      // Not POST?
      if ( strtolower($tmp[1]) != 'post' ) {

         // Convert to post and flag as a conversion
         $input[1] = str_replace($tmp[0], 'method="post"', $input[1]);
         $converted = true;

      }

   } else {

      // Append a POST method (no method given and GET is default)
      $input[1] .= ' method="post"';
      $converted = true;

   }

   // Prepare the extra input to insert
   $add = empty($converted) ? '' : '<input type="hidden" name="convertGET" value="1">';

   // To do: javascript onsubmit event to immediately redirect to the appropriate
   // location using GET data, without an intermediate POST to the proxy script.

   // Proxify the form action
   $input[1] = preg_replace_callback('#\baction\s*=\s*([\\\'"])?((?(1)(?(?<=")[^"]{1,1000}|[^\\\']{1,1000})|[^\s"\\\'>]{1,1000}))(?(1)\\1|)#i', 'html_formAction', $input[1]);

   // What type of form is this? Due to register_globals support, PHP converts
   // a number of characters to _ in incoming variable names. To get around this,
   // we can use the raw post data from php://input but this is not available
   // for multipart forms. Instead we must encode the input names in these forms.
   if ( stripos($input[1], 'multipart/form-data') ) {

      $input[2] = preg_replace_callback('#name\s*=\s*([\\\'"])?((?(1)(?(?<=")[^"]{1,1000}|[^\\\']{1,1000})|[^\s"\\\'>]{1,1000}))(?(1)\\1|)#i', 'html_inputName', $input[2]);

   }

   // Return updated form
   return '<form' . $input[1] . '>' . $add . $input[2] . '</form>';

}

// Proxify the action="URL" value in forms
function html_formAction($input) {
   return 'action=' . $input[1] . proxifyURL($input[2]) . $input[1];
}

// Encode input names
function html_inputName($input) {
   return 'name=' . $input[1] . inputEncode($input[2]) . $input[1];
}

// Proxify URL values in attributes
function html_attribute($input) {

   // Is this an iframe?
   $flag = stripos($input[0], 'iframe') === 1 ? 'frame' : '';

   // URL occurred as value of an attribute and should have been htmlspecialchar()ed
   // We need to do the job of the browser and decode before proxifying.
   return str_replace($input[3], htmlspecialchars(proxifyURL(htmlspecialchars_decode($input[3]), $flag)), $input[0]);
}

// Flag frames in a frameset so only the first one shows the mini-form.
// This could be done in the above callback but adds extra processing
// when 99% of the time, it won't be needed.
function html_flagFrames($input) {

   static $addFlag;

   // If it's the first frame, leave it but set the flag var
   if ( ! isset($addFlag) ) {
      $addFlag = true;
      return $input[0];
   }

   // Add the frame flag
   $newURL = $input[2] . ( strpos($input[2], '?') ? '&f=frame' : 'fframe/');

   return str_replace($input[2], $newURL, $input[0]);

}


/*****************************************************************
* CSS callbacks
******************************************************************/

// Proxify CSS url(LOCATION)
function css_URL($input) {
   return 'url(' . proxifyURL(trim($input[1])) . ')';
}

// Proxify CSS @import "URL"
function css_import($input) {
   return '@import "' . proxifyURL($input[1]) . '"';
}

// Proxify CSS src=
function css_src($input) {
   return 'src=' . $input[1] . proxifyURL($input[2]) . $input[1];
}

// Callbacks for use with unique URLs and cached CSS
// The <UNIQUE[]URL> acts as a marker for quick and easy processing later

// Unique CSS url(LOCATION)
function css_URL_unique($input) {
   return 'url(<UNIQUE[' . absoluteURL($input[1],'') . ']URL>)';
}

// Unique CSS @import "URL"
function css_import_unique($input) {
   return '@import "<UNIQUE[' . absoluteURL($input[1]) . ']URL>"';
}

// Unique CSS src=
function css_src_unique($input) {
   return 'src=' . $input[1] . '<UNIQUE[' . absoluteURL($input[2]) . ']URL>' . $input[1];
}


/*****************************************************************
* Helper functions
******************************************************************/

// Take a string, and check that the next non-whitespace char is the
// passed in char (X). Return false if non-whitespace and non-X char is
// found. Otherwise, return the position of X.
// If $inverse is true, the next non-whitespace char must NOT be in $char
// If $pastChar is true, ignore whitespace after finding X and return
// the position of the last post-X whitespace char.
function str_checknext($input, $char, $offset, $inverse = false, $pastChar = false) {

   for ( $i = $offset, $length = strlen($input); $i < $length; ++$i ) {

      // Examine char
      switch ( $input[$i] ) {

         // Ignore whitespace
         case ' ':
         case "\t":
         case "\r":
         case "\n":
            break;

         // Found the passed char
         case $char:
         
            // $inverse means we do NOT want this char
            if ( $inverse ) {
               return false;
            }
            
            // Move past this to the next non-whitespace?
            if ( $pastChar ) {
               ++$i;
               return $i + strspn($input, " \t\r\n", $i);
            }
            
            // Found desired char, no $pastChar, just return  X offset
            return $i;

         // Found non-$char non-whitespace
         default:
         
            // This is the desired result if $inverse
            if ( $inverse ) {
               return $i;
            }
            
            // No $inverse, found a non-$char, return false
            return false;

      }

   }

   return false;

}


// Same as above but go backwards
function str_checkprev($input, $char, $offset, $inverse = false) {

   for ( $i = $offset; $i > 0; --$i ) {

      // Examine char
      switch ( $input[$i] ) {

         // Ignore whitespace
         case ' ':
         case "\t":
         case "\r":
         case "\n":
            break;

         // Found char
         case $char:
            return $inverse ? false : $i;

         // Found non-$char char
         default:
            return $inverse ? $i : false;

      }

   }
   
   return $inverse;

}


// Analyze javascript and return offset positions.
// Default is to find the end of the statement, indicated by:
//  (1) ; while not in string
//  (2) newline which, if not there, would create invalid syntax
//  (3) a closing bracket (object, language construct or function call) for which
//      no corresponding opening bracket was detected AFTER the passed offset
// If (int) $argPos is true, we return an array of the start and end position
// for the nth argument, where n = $argPos. The $start position must be just inside
// the parenthesis of the function call we're interested in.
function analyze_js($input, $start, $argPos = false) {

   // Set chars we're interested in
   $specialChars = ";\n\r\"'+{}()[]";

   // Add , if looking for an argument position
   if ( $argPos ) {
      $specialChars .= ',';
      $currentArg = 1;
   }

   // Loop through the input, stopping only at special chars
   for ( $i = $start, $length = strlen($input), $end = false, $openObjects = $openBrackets = $openArrays = 0;
         $end === false && ( $i += strcspn($input, $specialChars, $i) ) && $i < $length && ( $char = $input[$i] );
         ++$i ) {

      switch ( $char ) {

         // Starting string delimiters
         case '"':
         case "'":

            if ( $input[$i-1] == '\\' ) { 
               break;
            }
         
            // Skip straight to end of string
            // Find the corresponding end delimiter and ensure it's not escaped
            while ( ( $i = strpos($input, $char, $i+1) ) && $input[$i-1] == '\\' );

            // Check for false, in which case we assume the end is the end of the doc
            if ( $i === false ) {
               break 2;
            }

            break;

         // End of operation?
         case ';':
            $end = $i;
            break;

         // New lines
         case "\n":
         case "\r":
            // Newlines are OK if occuring within an open brackets, arrays or objects.
            if ( $openObjects || $openBrackets || $openArrays || $argPos ) {
               break;
            }

            // Newlines are also OK if followed by an opening function OR concatenation
            // e.g. someFunc\n(params) or someVar \n + anotherVar
            // Find next non-whitespace char position
            $tmp = $i + strspn($input, " \t\r\n", $i+1);

            // And compare to allowed chars
            if ( isset($input[$tmp+1]) && ( $input[$tmp+1] == '(' || $input[$tmp+1] == '+' ) ) {
               $i = $tmp;
               break;
            }

            // Newline not indicated as OK, set the end to here
            $end = $i;
            break;

         // Concatenation
         case '+':
            // Our interest in the + operator is it's use in allowing an expression
            // to span multiple lines. If we come across a +, move past all whitespace,
            // including newlines (which would otherwise indicate end of expression).
            $i += strspn($input, " \t\r\n", $i+1);
            break;

         // Opening chars (objects, parenthesis and arrays)
         case '{':
            ++$openObjects;
            break;
         case '(':
            ++$openBrackets;
            break;
         case '[':
            ++$openArrays;
            break;

         // Closing chars - is there a corresponding open char?
         // Yes = reduce stored count. No = end of statement.
         case '}':
            $openObjects   ? --$openObjects   : $end = $i;
            break;
         case ')':
            $openBrackets  ? --$openBrackets  : $end = $i;
            break;
         case ']':
            $openArrays    ? --$openArrays    : $end = $i;
            break;

         // Commas - tell us which argument it is
         case ',':

            // Ignore commas inside other functions or whatnot
            if ( $openObjects || $openBrackets || $openArrays ) {
               break;
            }

            // End now
            if ( $currentArg == $argPos ) {
               $end = $i;
            }

            // Increase the current argument number
            ++$currentArg;

            // If we're not after the first arg, start now?
            if ( $currentArg == $argPos ) {
               $start = $i+1;
            }

            break;

      }

   }

   // End not found? Use end of document
   if ( $end === false ) {
      $end = $length;
   }

   // Return array of start/end
   if ( $argPos ) {
      return array($start, $end);
   }

   // Return end
   return $end;

}

/*****************************************************************
* Page encoding functions
******************************************************************/

// Encode page - splits into HTML/script sections and encodes HTML
function encodePage($input) {

   // Look for script blocks
   if ( preg_match_all('#<script.*?</script>#is', $input, $scripts, PREG_OFFSET_CAPTURE) ) {
   
      // Create starting offset - only start encoding after the <head>
      // as this seems to help browsers cope!
      $offset = preg_match('#<head[^>]*>(.)#is', $input, $tmp, PREG_OFFSET_CAPTURE) ? $tmp[1][1] : 0;
      $new    = $offset ? substr($input, 0, $offset) : '';
      
      // Go through all the matches
      foreach ( $scripts[0] as $id => $match ) {

         // Determine position of the preceeding non-script block
         $end    = $match[1] ? $match[1]-1 : 0;
         $start  = $offset; 
         $length = $end - $start;
         
         // Add encoded block to page if there is one
         if ( $length ) 
         $new .= encodeBlock(substr($input, $start, $length));
         
         // Add unencoded script to page
         $new .= $match[0];
         
         // Move offset up
         $offset = $match[1] + strlen($match[0]);
         
      }
      
      // Add final block
      if ( $remainder = substr($input, $offset) ) {
         $new .= encodeBlock($remainder);
      }
      
      // Update input with new
      $input = $new;
      
   } else {
      // No scripts is easy - just encode the lot
      $input = encodeBlock($input);
   }

   // Return the encoded page
   return $input;

}

// Encode block - applies the actual encoding (or rather "escaping")
function encodeBlock($input) {

	// Escape values
	$s = array('a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z','\'',"\r","\n",'-');
	$r = array('%61','%62','%63','%64','%65','%66','%67','%68','%69','%6a','%6b','%6c','%6d','%6e','%6f','%70','%71','%72','%73','%74','%75','%76','%77','%78','%79','%7a','%41','%42','%43','%44','%45','%46','%47','%48','%49','%4a','%4b','%4c','%4d','%4e','%4f','%50','%51','%52','%53','%54','%55','%56','%57','%58','%59','%5a','%27','%0d','%0a','%2D');

   // Return javascript decoder
	return '<script type="text/javascript">document.write(unescape(\'' . str_replace($s, $r, $input) . '\'));</script>';

}
